# reload data and refit models to wells data wells <- read.csv("ecol 563/wells.txt") out1 <- glm(cbind(y, n-y)~ land.use + sewer, data=wells, family=binomial) wells$land.use2 <- factor(ifelse(wells$land.use %in% c('agri','undev'), 'rural', as.character(wells$land.use))) wells$land.use3 <- factor(ifelse(wells$land.use2 %in% c('inst','recr','resL','resM','trans'), 'mixed', as.character(wells$land.use2))) wells$land.use4 <- factor(ifelse(wells$land.use3 %in% c('resH','comm','indus'), 'high.use', as.character(wells$land.use3))) out2d <- glm(cbind(y,n-y)~land.use4+sewer, data=wells, family=binomial) out2i <- glm(cbind(y,n-y) ~ land.use4 + sewer + nitrate, data=wells, family=binomial) # predict returns logits predict(out2d) # fitted returns probabilities fitted(out2d) # predict with type argument returnes probabilities # normal-based confidence intervals for them are not appropriate here predict(out2d, type='response', se.fit=T) # create data frame containing the six unique combinations of sewer and land use new.dat <- expand.grid(land.use4=levels(wells$land.use4), sewer=levels(wells$sewer)) # obtain estimated logits and their standard errors out.p <- predict(out2d, se.fit=T, newdata=new.dat) out.p # create data frame of logits, standard errors, and categories new.dat2 <- data.frame(new.dat, logit=out.p$fit, se=out.p$se) new.dat2 # calculate Wald-based confidence intervals for logits new.dat2$low95 <- new.dat2$logit + qnorm(.025)*new.dat2$se new.dat2$up95 <- new.dat2$logit + qnorm(.975)*new.dat2$se new.dat2 # inverse logit function inv.logit <- function(eta) exp(eta)/(1+exp(eta)) # invert logits and their confidence intervals apply(new.dat2[,c(3,5:6)], 2, inv.logit) # add probabilities and their confidence intervals to data frame p.dat <- data.frame(new.dat,apply(new.dat2[,c(3,5:6)], 2, inv.logit)) p.dat # relabel logit column names(p.dat)[3] <- 'prob' p.dat # obtaining confidence intervals using profile likelihood confint(out2d) # return logits for land use categories when sewer= no out2d1 <- glm(cbind(y,n-y)~land.use4+sewer-1, data=wells, family=binomial) coef(out2d) coef(out2d1) # obtain confidence intervals out.c1 <- confint(out2d1) out.c1 # define new sewer factor with sewer=yes the reference group wells$sewer2 <- factor(wells$sewer, levels=rev(levels(wells$sewer))) levels(wells$sewer) levels(wells$sewer2) # refit model using new sewer variable out2d2 <- glm(cbind(y,n-y)~land.use4+sewer2-1, data=wells, family=binomial) coef(out2d2) out.c2 <- confint(out2d2) out.c2 # collect results in a matrix out.profile <- data.frame(est=c(coef(out2d1)[1:3], coef(out2d2)[1:3]), rbind(out.c1[1:3,],out.c2[1:3,])) # obtain probabilities p.dat.profile <- data.frame(new.dat, inv.logit(out.profile)) names(p.dat.profile)[3:5] <- c('prob', 'low95', 'up95') p.dat.profile ### graph probabilities and their confidence intervals #### oldmar <- par("mar") par(mar=c(4.1,8.1,1.1,1.1)) p.dat$both <- factor(paste(p.dat$sewer, p.dat$land.use4, sep='.')) p.dat p.dat$num <- as.numeric(p.dat$both) p.dat plot(num~prob, data=p.dat, xlab='Probability of contamination', ylab='', xlim=c(0, 0.55), type='n', axes=F) axis(1) axis(2, at=1:6, labels=rep(c('high use', 'mixed use', 'rural'),2), las=2, cex.axis=.9) box() par(lend=1) segments(p.dat$low95, p.dat$num, p.dat$up95, p.dat$num, col='grey70', lwd=5) points(p.dat$prob, p.dat$num, pch='|', lwd=2, cex=1.5, col=1) mtext(side=2, at=c(2,5), text=c('Sewers absent','Sewers present'), line=5.5, cex=1.2) abline(h=3.5, lty=2) par(mar=oldmar) ### graphing probabilities with continuous predictors #### coef(out2i) # there are no common values of nitrate for all six categories tapply(wells$nitrate, list(wells$land.use4,wells$sewer), mean) tapply(wells$nitrate, list(wells$land.use4,wells$sewer), max) tapply(wells$nitrate, list(wells$land.use4,wells$sewer), min) # regression model for the logit logit.mod <- function(mixed,rural,sewer,x) coef(out2i)[1]+ coef(out2i)[2]*mixed+coef(out2i)[3]*rural+coef(out2i)[4]*sewer+ coef(out2i)[5]*x logit.mod(0,0,0,3) # inverse logit for continuous model prob.mod <- function(mixed,rural,sewer,x) exp(logit.mod(mixed,rural,sewer,x))/(1+exp(logit.mod(mixed,rural,sewer,x))) prob.mod(0,0,0,3) # graph over the range of the data range(wells$nitrate) out.range <- tapply(wells$nitrate, list(wells$land.use4, wells$sewer), range) plot(c(0.8,7),c(0,0.65), xlab='nitrate', ylab='Probability', type='n') curve(prob.mod(0,0,0,x), add=T, xlim=out.range[1,]$no , lwd=2) curve(prob.mod(1,0,0,x), add=T, col=2, xlim=out.range[2,]$no, lwd=2) curve(prob.mod(0,1,0,x), add=T, col=3, xlim=out.range[3,]$no, lwd=2) curve(prob.mod(0,0,1,x), add=T, lty=2, xlim=out.range[1,]$yes, lwd=2) curve(prob.mod(1,0,1,x), add=T, col=2, lty=2, xlim=out.range[2,]$yes, lwd=2) curve(prob.mod(0,1,1,x), add=T, col=3, lty=2, xlim=out.range[3,]$yes, lwd=2) legend('topleft', rep(c('high use', 'mixed use', 'rural'),2), col=rep(1:3,2), lty=rep(1:2,each=3), lwd=2, cex=.9, bty='n', ncol=2, title='Sewers absent Sewers present') # plotting log odds ratios with continuous and categorical predictors wells$land.use4a <- factor(wells$land.use4, levels=rev(levels(wells$land.use4))) levels(wells$land.use4) levels(wells$land.use4a) # refit model so all odds ratios are positive out2m <- glm(cbind(y,n-y)~land.use4a+sewer+nitrate, data=wells, family=binomial) out.conf <- confint(out2m) out.conf # log odds ratios and confidence intervals out.mod <- data.frame(var=1:4, est=coef(out2m)[2:5], out.conf[2:5,]) out.mod names(out.mod)[3:4] <- c('low95','up95') out.mod # create labels for log odds ratios mylabs <- c('land use:\nmixed use vs rural','land use:\nhigh use vs rural','sewer:\nyes vs no', 'nitrate:\n1 mg/l increase') # prepanel function myprepanel.ci <- function(x,y,lx,ux,subscripts,...){ list(xlim=range(x, ux[subscripts], lx[subscripts], finite=T,...))} library(lattice) xyplot(factor(var, labels=mylabs)~est, xlab='Log odds ratio', ylab='', lx=out.mod$low95, ux=out.mod$up95, data=out.mod, prepanel=myprepanel.ci, panel=function(x,y){ panel.segments(out.mod$low95, y, out.mod$up95, y, lwd=5, col='grey70', lineend=1) panel.xyplot(x, y, pch='|', cex=1.5, col=1, lwd=2) panel.abline(v=0, lty=2, col=2)}) # randomized block design with binomial response # read data in from clipboard: Windows crabs <- read.table('clipboard', header=T) # read data in from clipboard: Mac OS crabs <- read.table(pipe("pbpaste"), header=T) crabs[1:4,] # fixed effects randomized block design out1 <- glm(cbind(killed,6-killed)~factor(Treatment)+factor(Block_Number), data=crabs, family=binomial) summary(out1) # random effects randomized block design library(lme4) out1.lmer <- lmer(cbind(killed,6-killed)~factor(Treatment)+(1|Block_Number), data=crabs, family=binomial) summary(out1.lmer)
Created by Pretty R at inside-R.org